################################################################################
################################################################################
####### THIS SCRIPT CREATES A CONTROL VARIABLE MEASURING OPPOSITION NATIONAL ###
############## Wrangling with ParlGov Data and then Merging #####################

###### CREATE A VAIRABLE THAT ACCOUNTS FOR BEING PART OF NATIONAL GOVERNING COALITION #####

library(haven)
library(readxl)
library(tidyverse)
library(lme4)
library(texreg)
library(sjPlot)
library(sjstats)
library(fixest)
library(ggplot2)
library(ggeffects)
library(lubridate)

################################################################################

### Load ParlGov Dataset
party_set <- read.csv("parlgov/view_party.csv")

### Keep only those with CHES identifier
party_ches <- subset(party_set, !is.na(party_set$chess))
party_ches <- party_ches[,c("party_id", "chess")]
any(duplicated(party_ches)) #no duplicates



### Load ParlGov Coalition Dataset

gov_set <- read.csv("parlgov/view_cabinet.csv")
gov_set$election_date <- date(gov_set$election_date)
gov_set$start_date <- date(gov_set$start_date)
gov_set <- subset(gov_set, gov_set$start_date > "1994-01-01")


# Vector containing names of countries in the dataset
eu_countries <- c(
  "Austria",
  "Belgium",
  "Bulgaria",
  "Croatia",
  "Cyprus",
  "Czech Republic",
  "Denmark",
  "Estonia",
  "Finland",
  "France",
  "Germany",
  "Greece",
  "Hungary",
  "Ireland",
  "Italy",
  "Latvia",
  "Lithuania",
  "Luxembourg",
  "Malta",
  "Netherlands",
  "Poland",
  "Portugal",
  "Romania",
  "Slovakia",
  "Slovenia",
  "Spain",
  "Sweden",
  "United Kingdom")

## gov EU
gov_EU <- gov_set[gov_set$country_name %in% eu_countries, ] ## keep only parties from EU countries
gov_EU <- gov_EU[gov_EU$cabinet_party == 1,] ## keep only governing parties
gov_EU_small <- gov_EU[,c("country_name","start_date","party_id", "party_name_short")]

gov_EU_merged <- left_join(gov_EU_small, party_ches, by = "party_id")

#### CHECK THOSE FOR WHICH CHES DATA IS MISSING ####

check_missing <- subset(gov_EU_merged, is.na(gov_EU_merged$chess))
### These are either parties from countries that are not in the CHES (Cyprus) or those that were very small when joining the government

gov_EU_merged$party_id <- NULL
gov_EU_merged$party_name_short <- NULL

### MAKE THIS WIDE SO THAT EVERY COALITION IS ONE ROW
gov_EU_wide <- gov_EU_merged %>%
  group_by(start_date, country_name) %>%
  mutate(coa_member = row_number()) %>%
  pivot_wider(names_from = coa_member, values_from = chess)
### START DATE OF NEW COALITION IS END DATE OF OLD COALITION
gov_EU_lead <- gov_EU_wide %>%
  group_by(country_name) %>%
  mutate(end_date = lead(start_date, n = 1))

names(gov_EU_lead)[3:11] <- c("coa_ches1", "coa_ches2", "coa_ches3","coa_ches4",
                              "coa_ches5","coa_ches6","coa_ches7","coa_ches8",
                              "coa_ches9")
### WHEN MERGING, IT WORKS BETTER WITHOUT NAs
gov_EU_lead[, 3:11][is.na(gov_EU_lead[, 3:11])] <- 0
### TAKE TODAYS DATE AS END DATE (DOESNT MATTER)
gov_EU_lead[, 12][is.na(gov_EU_lead[, 12])] <- date("2023-06-06")


#### PUT THAT TOGETHER WITH SPEECH DATA

start_date <- as.Date("1999-01-01")
end_date <- as.Date("2020-01-01")
date_sequence <- seq(start_date, end_date, by = "day")


dataset <- data.frame(
  daily_date = rep(date_sequence, times = length(eu_countries)),
  country_name = rep(eu_countries, each = length(date_sequence))
)

merged_df <- dataset %>%
  right_join(gov_EU_lead, by = "country_name") %>%
  filter(daily_date >= start_date, daily_date <= end_date)


merged_df <- merged_df[,c(1:2,4:12)]
colnames(merged_df)[1:2] <- c("date", "country")
duplicated(merged_df)## days on which the new cabinet started are duplicated 
merged_df <- merged_df[!duplicated(merged_df[,c('date','country')]),]
#This is a dataset that contains for every day between 1990 and 2020 who was in government (CHES IDs)
# at a given date


#### Load speech data
load("Data/EP_debates_11032023.Rdata")


EP_merged <- left_join(EP_debates, merged_df, by =c("country","date"))
EP_merged$nat_opp <-      ifelse(EP_merged$party_id == EP_merged$coa_ches1 |
                                   EP_merged$party_id == EP_merged$coa_ches2 |
                                   EP_merged$party_id == EP_merged$coa_ches3 |
                                   EP_merged$party_id == EP_merged$coa_ches2 |
                                   EP_merged$party_id == EP_merged$coa_ches2 |
                                   EP_merged$party_id == EP_merged$coa_ches6 |
                                   EP_merged$party_id == EP_merged$coa_ches7 |
                                   EP_merged$party_id == EP_merged$coa_ches8 |
                                   EP_merged$party_id == EP_merged$coa_ches9, 0,1)
#### takes the value 1 for being in national opposition at a certain day, 0 when being in government
#### NAs when party is not in CHES
### quality checked in separate script
EP_debates <- EP_merged
save(EP_debates, file = "Data/EP_debates_07062023.Rdata")
